set.seed(1998)
x <- rnorm(n = 3000, mean = 0.01, sd = 0.02)
# plot(x)
# plot(cumsum(x))

dd <- data.frame(cbind("k" = 1:length(x), "x_t" = x))
ggplot(dd) +
  geom_line(aes(x = k, y = x_t), size = 0.35)

dd <- data.frame(cbind("k" = 1:length(x), "y_t" = cumsum(x)))
ggplot(dd) +
  geom_line(aes(x = k, y = y_t), size = 1, colour = "gold3")

n <- 3000
nu <- 10
mu <- 0.001
vol <- 0.005*2
scaler <- sqrt(vol^2/(nu)*(nu - 2))

set.seed(1998)
x <- mu + scaler*rt(n = n, df = nu)
x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
y <- cumsum(x)

dd <- data.frame(cbind("k" = 1:length(x), "x_t" = x))
ggplot(dd) +
  geom_line(aes(x = k, y = x_t), size = 0.35)

dAll <- seq(from = 0, to = 1, length.out = 11)
fracDiffAll <- xts()
for (i in 1:length(dAll)) {
  d <- dAll[i]
  th <- 1e-4
  
  yDiff <- na.omit(fracDiff_FFD(y, d, th))
  fracDiffAll <- cbind(fracDiffAll, yDiff)
}

colnames(fracDiffAll) <- paste("d =", as.character(dAll))
index(fracDiffAll) <- index(y)
plotTimeSeries(fracDiffAll)

allD <- seq(from = 0, to = 1, length.out = 21)
allADFStatistics <- rep(NA, length(allD))
stationaryTSeries<- rep(FALSE, length(allD))
for (i in 1:length(allD)) {
  tSeriesFracDiff_FFD <- na.omit(fracDiff_FFD(y, allD[i], 1e-4))
  ADFTest <- adf.test(tSeriesFracDiff_FFD)
  
  allADFStatistics[i] <- ADFTest$statistic
  if (ADFTest$p.value <= 0.01) stationaryTSeries[i] <- TRUE
}
  
dataADF <- data.frame(cbind(allD, allADFStatistics, as.factor(stationaryTSeries)))
ggplot(dataADF, aes(x = allD, y = allADFStatistics)) + 
  geom_line(colour = "gold2", size = 1) + 
  geom_point(aes(fill = stationaryTSeries), size = 3, shape = 21, stroke = 0) + 
  scale_fill_manual(values = c("red2", "green3")) +
  xlab("d") + ylab("ADF Statistic") +
  scale_x_continuous(breaks = seq(from = 0, to = 1, length.out = 11)) +
  ggtitle("ADF Test Statistic as a function of d")

IID-Samples

d <- 0.35
# d <- 0.15
th <- 1e-4
nFwd <- 150  # 7 months
set.seed(1998)

totMeanDiff <- totMeanDiffLM <- totMeanRet <- 0

# allwInverse <- list()
# for (i in 1:9) {
#   print(i)
#   allwInverse[[i]] <- getInverseWeights_FFD(i/10, th, n + nFwd)
# }
# 
# saveRDS(allwInverse, file = "allwInverse.Rds")

wInverse <- getInverseWeights_FFD(d, th, n + nFwd)

nIter <- 50
for (i in 1:nIter) {
  # print(paste("Iteracion", i))
  x <- mu + scaler*rt(n = n, df = nu)
  x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
  y <- cumsum(x)
  
  # y <- c(0, 0, 0)
  # for (k in 4:(n + 1)) {
  #   y <- c(y, 0.5 + 0.3*y[k - 1] + 0.4*y[k - 2] + 0.297*y[k - 3]
  #            + rnorm(1, mean = 0, sd = 1.5))
  # }
  # x <- diff(y)
  # x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
  # y <- as.xts(y[-1], order.by = index(x))
  # 
  # print(adf.test(y))
  # plot(y)
  
  # x <- mu + scaler*rt(n = n, df = nu)
  # for (i in 1:length(x)) {
  #   if (mod(i, 100) == 35) {
  #     x[i:(i + 4)] <- rnorm(5, mean = -30*mu, sd = 0.006)
  #     x[(i + 5):(i + 14)] <- rnorm(10, mean = 0, sd = 0.003)
  #     x[(i + 15):(i + 24)] <- rnorm(10, mean = 15*mu, sd = 0.003)
  #   } 
  # }
  # x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
  # y <- cumsum(x)

  yTrn <- y[1:(length(y) - nFwd)]
  yTst <- y[(length(y) - nFwd + 1):length(y)]
  
  yDiffOriginal <- fracDiff_FFD(yTrn, d, th)
  yDiff <- completeFracDiff_FFD(yTrn, yDiffOriginal, d, th)
  
  # _____ Linear Model _____
  yLM <- as.vector(yDiff[(length(yDiff) - 21*6*5):length(yDiff)])
  xLM <- 1:length(yLM)
  dd <- data.frame(xLM, yLM)
  linearMod <- lm(yLM~xLM, data = dd)
  dd <- data.frame("xLM" = (length(xLM) + 1):(length(xLM) + nFwd))
  yDiffLM <- c(yDiff, as.xts(predict(linearMod, dd), 
                             order.by = tail(index(yDiff), 1) + 1:nFwd))
  yIntLM <- inverseFracDiff_FFD(yDiffLM, d, th, w = wInverse)
  # _______________
  
  muDiff <- mean(yDiff[(length(yDiff) - 21*6*5):length(yDiff)])
  yDiff <- c(yDiff, as.xts(rep(muDiff, nFwd), 
                           order.by = tail(index(yDiff), 1) + 1:nFwd))
  
  yDiffOriginal <- c(yDiffOriginal, 
                     as.xts(rep(muDiff, nFwd), 
                            order.by = tail(index(yDiffOriginal), 1) + 1:nFwd))
  
  yInt <- inverseFracDiff_FFD(yDiff, d, th, w = wInverse)
  
  yDiff2 <- fracDiff(yTrn, 1, 1)
  muDiff2 <- mean(yDiff2[(length(yDiff2) - 21*6*5):length(yDiff2)])
  yDiff2 <- c(yDiff2, as.xts(rep(muDiff2, nFwd), 
                             order.by = tail(index(yDiff2), 1) + 1:nFwd))
  yInt2 <- cumsum(yDiff2)
  
  dd <- cbind("y" = y, "yDiff" = yDiffOriginal, "yInt" = yInt, "yIntLM" = yIntLM, 
              "yDiff2" = yDiff2, "yInt2" = yInt2)
  if (i <= 5) print(plotTimeSeries(dd))
  
  dd <- cbind("y" = yTst, "yInt" = yInt[index(yTst)], 
              "yIntLM" = yIntLM[index(yTst)], 
              "yInt2" = yInt2[index(yTst)])
  if (i <= 5) print(plotTimeSeries(dd))
  
  totMeanDiff <- 1/length(yTst)*sum(abs(yTst - yInt[index(yTst)])^2) + totMeanDiff
  totMeanDiffLM <- 1/length(yTst)*sum(abs(yTst - yIntLM[index(yTst)])^2) + totMeanDiffLM
  totMeanRet <- 1/length(yTst)*sum(abs(yTst - yInt2[index(yTst)])^2) + totMeanRet
}

totMeanDiff <- totMeanDiff/nIter
totMeanDiffLM <- totMeanDiffLM/nIter
totMeanRet <- totMeanRet/nIter

print(totMeanDiff)
## [1] 0.05301019
print(totMeanDiffLM)
## [1] 0.01258059
print(totMeanRet)
## [1] 0.007291666

Non-IID Samples (Ver. 1)

# d <- 0.35
d <- 0.15
th <- 1e-4
nFwd <- 150  # 7 months
set.seed(1995)

totMeanDiff <- totMeanDiffLM <- totMeanRet <- 0

wInverse <- getInverseWeights_FFD(d, th, n + nFwd)

x <- mu + scaler*rt(n = n, df = nu)
for (i in 1:length(x)) {
  if (mod(i, 100) == 35) {
    x[i:(i + 4)] <- rnorm(5, mean = -30*mu, sd = 0.006)
    x[(i + 5):(i + 14)] <- rnorm(10, mean = 0, sd = 0.003)
    x[(i + 15):(i + 24)] <- rnorm(10, mean = 15*mu, sd = 0.003)
  }
}
x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
y <- cumsum(x)

plot(x[1:100])

print(adf.test(y))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -3.7585, Lag order = 14, p-value = 0.02105
## alternative hypothesis: stationary
stationaryTSeries[] <- FALSE
for (i in 1:length(allD)) {
  tSeriesFracDiff_FFD <- na.omit(fracDiff_FFD(y, allD[i], 1e-4))
  ADFTest <- adf.test(tSeriesFracDiff_FFD)
  
  allADFStatistics[i] <- ADFTest$statistic
  if (ADFTest$p.value <= 0.01) stationaryTSeries[i] <- TRUE
}
  
dataADF <- data.frame(cbind(allD, allADFStatistics, as.factor(stationaryTSeries)))
print(ggplot(dataADF, aes(x = allD, y = allADFStatistics)) + 
        geom_line(colour = "gold2", size = 1) + 
        geom_point(aes(fill = stationaryTSeries), size = 3, shape = 21, stroke = 0) + 
        scale_fill_manual(values = c("red2", "green3")) +
        xlab("d") + ylab("ADF Statistic") +
        scale_x_continuous(breaks = seq(from = 0, to = 1, length.out = 11)) +
        ggtitle("ADF Test Statistic as a function of d"))

nIter <- 50
for (i in 1:nIter) {
  x <- mu + scaler*rt(n = n, df = nu)
  for (k0 in 1:length(x)) {
    if (mod(k0, 100) == 35) {
      x[k0:(k0 + 4)] <- rnorm(5, mean = -30*mu, sd = 0.006)
      x[(k0 + 5):(k0 + 14)] <- rnorm(10, mean = 0, sd = 0.003)
      x[(k0 + 15):(k0 + 24)] <- rnorm(10, mean = 15*mu, sd = 0.003)
    }
  }
  x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
  y <- cumsum(x)

  yTrn <- y[1:(length(y) - nFwd)]
  yTst <- y[(length(y) - nFwd + 1):length(y)]
  
  yDiffOriginal <- fracDiff_FFD(yTrn, d, th)
  yDiff <- completeFracDiff_FFD(yTrn, yDiffOriginal, d, th)
  
  # _____ Linear Model _____
  yLM <- as.vector(yDiff[(length(yDiff) - 21*6*5):length(yDiff)])
  xLM <- 1:length(yLM)
  dd <- data.frame(xLM, yLM)
  linearMod <- lm(yLM~xLM, data = dd)
  dd <- data.frame("xLM" = (length(xLM) + 1):(length(xLM) + nFwd))
  yDiffLM <- c(yDiff, as.xts(predict(linearMod, dd), 
                             order.by = tail(index(yDiff), 1) + 1:nFwd))
  yIntLM <- inverseFracDiff_FFD(yDiffLM, d, th, w = wInverse)
  # _______________
  
  muDiff <- mean(yDiff[(length(yDiff) - 21*6*5):length(yDiff)])
  yDiff <- c(yDiff, as.xts(rep(muDiff, nFwd), 
                           order.by = tail(index(yDiff), 1) + 1:nFwd))
  
  yDiffOriginal <- c(yDiffOriginal, 
                     as.xts(rep(muDiff, nFwd), 
                            order.by = tail(index(yDiffOriginal), 1) + 1:nFwd))
  
  yInt <- inverseFracDiff_FFD(yDiff, d, th, w = wInverse)
  
  yDiff2 <- fracDiff(yTrn, 1, 1)
  muDiff2 <- mean(yDiff2[(length(yDiff2) - 21*6*5):length(yDiff2)])
  yDiff2 <- c(yDiff2, as.xts(rep(muDiff2, nFwd), 
                             order.by = tail(index(yDiff2), 1) + 1:nFwd))
  yInt2 <- cumsum(yDiff2)
  
  dd <- cbind("y" = y, "yDiff" = yDiffOriginal, "yInt" = yInt, "yIntLM" = yIntLM, 
              "yDiff2" = yDiff2, "yInt2" = yInt2)
  if (i <= 5) print(plotTimeSeries(dd))
  
  dd <- cbind("y" = yTst, "yInt" = yInt[index(yTst)], 
              "yIntLM" = yIntLM[index(yTst)], 
              "yInt2" = yInt2[index(yTst)])
  if (i <= 5) print(plotTimeSeries(dd))
  
  totMeanDiff <- 1/length(yTst)*sum(abs(yTst - yInt[index(yTst)])^2) + totMeanDiff
  totMeanDiffLM <- 1/length(yTst)*sum(abs(yTst - yIntLM[index(yTst)])^2) + totMeanDiffLM
  totMeanRet <- 1/length(yTst)*sum(abs(yTst - yInt2[index(yTst)])^2) + totMeanRet
}

totMeanDiff <- totMeanDiff/nIter
totMeanDiffLM <- totMeanDiffLM/nIter
totMeanRet <- totMeanRet/nIter

print(totMeanDiff)
## [1] 0.06859932
print(totMeanDiffLM)
## [1] 0.01691159
print(totMeanRet)
## [1] 0.02652752

Non-IID Samples (Ver. 2)

n <- 3000
mu <- 0.01
vol <- 0.03
period <- 21*6  # 6 months
muSin <- mu*sin(mod(1:n, period)/period*2*pi)

set.seed(1995)
x <- c()
for (i in 1:length(muSin)) {
  x <- c(x, rnorm(n = 1, mean = muSin[i], sd = vol))
}

x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
y <- cumsum(x)

dd <- data.frame(cbind("k" = 1:length(x), "x_t" = x))
ggplot(dd) +
  geom_line(aes(x = k, y = x_t), size = 0.35)

dAll <- seq(from = 0, to = 1, length.out = 11)
fracDiffAll <- xts()
for (i in 1:length(dAll)) {
  d <- dAll[i]
  th <- 1e-4
  
  yDiff <- na.omit(fracDiff_FFD(y, d, th))
  print(adf.test(yDiff))
  fracDiffAll <- cbind(fracDiffAll, yDiff)
}
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -3.8284, Lag order = 14, p-value = 0.01756
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -4.316, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -4.6916, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -5.1811, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -5.8904, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -6.4821, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -7.1621, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -7.8468, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -8.7142, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -9.7792, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yDiff
## Dickey-Fuller = -10.914, Lag order = 14, p-value = 0.01
## alternative hypothesis: stationary
colnames(fracDiffAll) <- paste("d =", as.character(dAll))
index(fracDiffAll) <- index(y)
plotTimeSeries(fracDiffAll)

d <- 0.2
th <- 1e-4
nFwd <- 150  # 1 month
set.seed(1998)

wInverse <- getInverseWeights_FFD(d, th, length(y) + nFwd)

x <- c()
for (i in 1:length(muSin)) {
  x <- c(x, rnorm(n = 1, mean = muSin[i], sd = vol))
}

x <- as.xts(x, order.by = as.Date("2000-01-01") + 1:n)
y <- cumsum(x)

yTrn <- y[1:(length(y) - nFwd)]
yTst <- y[(length(y) - nFwd + 1):length(y)]

yDiffOriginal <- fracDiff_FFD(yTrn, d, th)
yDiff <- completeFracDiff_FFD(yTrn, yDiffOriginal, d, th)

# _____ Linear Model _____
yLM <- as.vector(yDiff[(length(yDiff) - 21*6):length(yDiff)])
xLM <- 1:length(yLM)
dd <- data.frame(xLM, yLM)
linearMod <- lm(yLM~xLM, data = dd)
dd <- data.frame("xLM" = (length(xLM) + 1):(length(xLM) + nFwd))
yDiffLM <- c(yDiff, as.xts(predict(linearMod, dd), 
                           order.by = tail(index(yDiff), 1) + 1:nFwd))
yIntLM <- inverseFracDiff_FFD(yDiffLM, d, th, w = wInverse)
# _______________

muDiff <- mean(yDiff[(length(yDiff) - 21*6):length(yDiff)])
yDiff <- c(yDiff, as.xts(rep(muDiff, nFwd), 
                         order.by = tail(index(yDiff), 1) + 1:nFwd))

yDiffOriginal <- c(yDiffOriginal, 
                   as.xts(rep(muDiff, nFwd), 
                          order.by = tail(index(yDiffOriginal), 1) + 1:nFwd))

yInt <- inverseFracDiff_FFD(yDiff, d, th, w = wInverse)

yDiff2 <- fracDiff(yTrn, 1, 1)
muDiff2 <- mean(yDiff2[(length(yDiff2) - 21*6):length(yDiff2)])
yDiff2 <- c(yDiff2, as.xts(rep(muDiff2, nFwd), 
                           order.by = tail(index(yDiff2), 1) + 1:nFwd))
yInt2 <- cumsum(yDiff2)

dd <- cbind("y" = y, "yDiff" = yDiffOriginal, "yInt" = yInt, "yIntLM" = yIntLM, 
            "yDiff2" = yDiff2, "yInt2" = yInt2)
print(plotTimeSeries(dd))

dd <- cbind("y" = yTst, "yInt" = yInt[index(yTst)], 
            "yIntLM" = yIntLM[index(yTst)], 
            "yInt2" = yInt2[index(yTst)])
print(plotTimeSeries(dd))

print(1/length(yTst)*sum(abs(yTst - yInt[index(yTst)])^2))
## [1] 0.115185
print(1/length(yTst)*sum(abs(yTst - yIntLM[index(yTst)])^2))
## [1] 0.7207504
print(1/length(yTst)*sum(abs(yTst - yInt2[index(yTst)])^2))
## [1] 0.2107852